from datetime import datetime
%run startup.ipy
import pandas as pd
pd.set_option('display.max_colwidth', -1)
import matplotlib.pyplot as plt
%matplotlib inline
sql(""" SELECT a.name, COUNT(*) AS cnt -- , w.name FROM work AS w JOIN l_artist_work AS law ON w.id = law.entity1 JOIN artist AS a ON a.id = law.entity0 JOIN link AS l ON l.id = law.link WHERE l.link_type = 168 AND a.area=98 GROUP BY a.name ORDER BY cnt DESC ; """)
# lib/MusicBrainz/Server/Constants.pm
EDIT_MEDIUM_CREATE = 51
EDIT_RECORDING_CREATE = 71
EDIT_RECORDING_EDIT = 72
EDIT_RECORDING_DELETE = 73
EDIT_RECORDING_MERGE = 74
def current_recording_count(artist_name):
return sql("""
SELECT COUNT(*) AS cnt
FROM artist AS a
JOIN artist_credit_name AS acn ON a.id = acn.artist
JOIN artist_credit AS ac ON ac.id = acn.artist_credit
JOIN recording AS r ON r.artist_credit = ac.id
WHERE a.name = %(artist_name)s
""", artist_name=artist_name).cnt[0]
def edits_from_new_medium(artist_name):
artist_id = sql("SELECT id FROM artist WHERE name=%(artist_name)s",
artist_name=artist_name).id[0]
df = sql("""
SELECT date_trunc('day', edit.open_time) AS date,
edit.id AS new,
edit_data.data->'tracklist' AS tracklist,
editor.name AS editor
FROM artist
JOIN edit_artist ON artist.id = edit_artist.artist
JOIN edit ON edit.id = edit_artist.edit
JOIN edit_data ON edit_data.edit = edit_artist.edit
JOIN editor ON editor.id = edit.editor
WHERE artist.name = %(artist_name)s
AND edit.type = %(EDIT_TYPE)s
;""", artist_name=artist_name, EDIT_TYPE=EDIT_MEDIUM_CREATE)
df.index = df['date'].apply(lambda d: d.date())
del df['date']
df['new'] = df.tracklist.apply(lambda tl: [t['artist_credit']['names'][0]['artist']['id']
for t in tl].count(artist_id))
del df['tracklist']
return df
def edits_from_standalone_recordings(artist_name):
df = sql("""
SELECT date_trunc('day', edit.open_time) AS date,
edit.id AS standalone,
editor.name AS editor
FROM artist
JOIN edit_artist ON artist.id = edit_artist.artist
JOIN edit ON edit.id = edit_artist.edit
JOIN edit_data ON edit_data.edit = edit_artist.edit
JOIN editor ON editor.id = edit.editor
WHERE artist.name = %(artist_name)s
AND edit.type = %(EDIT_TYPE)s
;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_CREATE)
df.index = df['date'].apply(lambda d: d.date())
del df['date']
return df
def edits_from_recordings_changed_from(artist_name):
df = sql("""
SELECT date_trunc('day', edit.open_time) AS date,
edit.id AS changed_from,
CASE WHEN edit_note.text ILIKE E'%%\r\nGM script:%%' THEN 1
ELSE 0
END AS userscript,
editor.name AS editor
FROM artist
JOIN edit_artist ON artist.id = edit_artist.artist
JOIN edit ON edit.id = edit_artist.edit
JOIN edit_data ON edit_data.edit = edit_artist.edit
JOIN editor ON editor.id = edit.editor
JOIN edit_note ON edit_note.edit = edit.id
WHERE artist.name = %(artist_name)s
AND edit.type = %(EDIT_TYPE)s
AND artist.id = CAST(edit_data.data->'old'->'artist_credit'->'names'->0->'artist'->>'id' AS integer)
;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_EDIT)
df.index = df['date'].apply(lambda d: d.date())
del df['date']
return df
def edits_from_recordings_changed_to(artist_name):
df = sql("""
SELECT date_trunc('day', edit.open_time) AS date,
edit.id AS changed_to,
editor.name AS editor
FROM artist
JOIN edit_artist ON artist.id = edit_artist.artist
JOIN edit ON edit.id = edit_artist.edit
JOIN edit_data ON edit_data.edit = edit_artist.edit
JOIN editor ON editor.id = edit.editor
WHERE artist.name = %(artist_name)s
AND edit.type = %(EDIT_TYPE)s
AND artist.id = CAST(edit_data.data->'new'->'artist_credit'->'names'->0->'artist'->>'id' AS integer)
;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_EDIT)
df.index = df['date'].apply(lambda d: d.date())
del df['date']
return df
def edits_from_recordings_merge(artist_name):
df = sql("""
SELECT date_trunc('day', edit.open_time) AS date,
edit.id AS merged,
editor.name AS editor
FROM artist
JOIN edit_artist ON artist.id = edit_artist.artist
JOIN edit ON edit.id = edit_artist.edit
JOIN edit_data ON edit_data.edit = edit_artist.edit
JOIN editor ON editor.id = edit.editor
WHERE artist.name = %(artist_name)s
AND edit.type = %(EDIT_TYPE)s
;""", artist_name=artist_name, EDIT_TYPE=EDIT_RECORDING_MERGE)
df.index = df['date'].apply(lambda d: d.date())
del df['date']
return df
sql(""" SELECT a.name, COUNT(*) AS cnt -- , w.name FROM work AS w JOIN l_artist_work AS law ON w.id = law.entity1 JOIN artist AS a ON a.id = law.entity0 JOIN link AS l ON l.id = law.link WHERE l.link_type = 168 AND a.begin_date_year < 1700 -- AND a.area=98 GROUP BY a.name ORDER BY cnt DESC LIMIT 10 ; """)
def recording_evolution(artist_name, editor='loujin'):
current = current_recording_count(artist_name)
df_new = edits_from_new_medium(artist_name)
df_standalone = edits_from_standalone_recordings(artist_name)
df_changed_to = edits_from_recordings_changed_to(artist_name)
df_changed_from = edits_from_recordings_changed_from(artist_name)
df_merged = edits_from_recordings_merge(artist_name)
df = pd.concat([df_new.new.groupby('date').count(),
df_standalone.standalone.groupby('date').count(),
df_changed_to.changed_to.groupby('date').count(),
- df_changed_from.changed_from.groupby('date').count(),
- df_merged.merged.groupby('date').count(),
], axis=1, sort=True).fillna(0)
df['total'] = (df.new + df.standalone + df.changed_to + df.changed_from + df.merged).cumsum()
df['total'] += current - df['total'][-1]
df['userscript'] = - df_changed_from.userscript.groupby('date').count()
df['userscript'] = df['userscript'].fillna(0)
df['total_userscript'] = df.userscript.cumsum()
df['total_userscript'] += current - df['total_userscript'][-1]
df_editor = pd.concat([df_new.new[df_new.editor == editor].groupby('date').count(),
df_standalone.standalone[df_standalone.editor == editor].groupby('date').count(),
df_changed_to.changed_to[df_changed_to.editor == editor].groupby('date').count(),
df_changed_from.changed_from[df_changed_from.editor == editor].groupby('date').count(),
df_merged.merged[df_merged.editor == editor].groupby('date').count(),
], axis=1, sort=True).fillna(0)
df[editor] = (df_editor.new + df_editor.standalone + df_editor.changed_to
- df_editor.changed_from - df_editor.merged)
df['total_' + editor] = df[editor].fillna(0).cumsum()
df['total_' + editor] += current - df['total_' + editor][-1]
iplot({
'data': [{
'x': df.index,
'y': df[col],
'name': col,
} for col in df.columns],
#'data': [{'x': [1,2], 'y': [3,4]}],
'layout': {'title': artist_name}
}, show_link=False)
#fig = plt.figure()
#df.total.plot(label='total')
#df.userscript.plot(style='ks', label='userscript')
#df[editor].plot(style='ko', label='my contribution')
#fig.legend()
#fig.suptitle(artist_name)
return df
leifs = recording_evolution('Jón Leifs')
webern = recording_evolution('Anton Webern')
ravel = recording_evolution('Maurice Ravel')
debussy = recording_evolution('Claude Debussy')
chopin = recording_evolution('Fryderyk Chopin')
bach = recording_evolution('Johann Sebastian Bach')
brahms = recording_evolution('Johannes Brahms')
beethoven = recording_evolution('Ludwig van Beethoven')
schubert = recording_evolution('Franz Schubert')
dvorak = recording_evolution('AntonÃn Dvořák')